suppressPackageStartupMessages(library("ggplot2"))
theme_set(ggpubr::theme_pubr(base_size=10, legend='bottom'))
suppressPackageStartupMessages(library("DESeq2"))
#Note that we do not have Clark Scores or MIA scores for AVAST-M lymph node samples and these need to be commented out to avoid errors.
#Load the data
tissue<-c("Skin")
load(paste("~/Desktop/Melanoma/deseq2Results/tc",tissue,"EventMetNo_VS_tc",tissue,"EventMetYes_CovariateCorrection.deseq2/de.Rdata", sep=""))
#find ENSEMBL ID corresponding to GRAMD1B
geneName<-"GRAMD1B"
select<-as.character(res.annot$id[res.annot$Name==geneName])
#extract expression of this gene
expressionData <- data.frame(assay(vsd)[select, ])
#Replace ENSEMBL IDs with corresponding gene names
names(expressionData) <- geneName
stand.fun <- function(x){(x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE)}
stdSignature<-stand.fun(expressionData$GRAMD1B)
names(stdSignature)<-rownames(expressionData) #make sure they retain the name of the samples
#Multiply by beta coefficient?
#betaCoeff <- read.table("~/Desktop/Melanoma/betaCoeff.tsv", sep = "\t", header = TRUE, quote = "")
#extract beta coefficient for the gene of interest
#betaCoeffGene <- betaCoeff[select, "EventMet_Yes_vs_No"]
#weightedSignature <- expressionData*betaCoeffGene
#standardization of the weighted signature
#stand.fun <- function(x){(x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE)}
#stdWeightedSignature <- stand.fun(weightedSignature$GRAMD1B)
#names(stdWeightedSignature)<-rownames(weightedSignature) #make sure they retain the name of the samples
As expected the absolute values after standardization of weighted signature (stand.fun(weightedSignature$GRAMD1B)) is same as those of standardized expression values (stand.fun(expressionData$GRAMD1B)). The only difference comes from sign of the beta coefficient which is negative for GRAMD1B
#Load clinical data
clinicalData<-data.frame(colData(vsd))
clinicalData$Signature<- stdSignature[rownames(clinicalData)] #merge signature in clinical data frame
#Check association of GRAMD1B with relapse
my_comparisons <- list( c("Yes", "No"))
ggplot(clinicalData[!is.na(clinicalData$EventMet),], aes(x=EventMet, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Distant metastases (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+
Find suitable cut-off. After talking to Roy, it seems that we could go for a weighted GRAMD1B value of 0? Let’s check the median value. May be I can take the median cut-off and perform chi-squared test to check if there is no difference between the mean of 2 groups?
#Divide in high/low groups based on mean and median
co_mean<-mean(clinicalData$Signature)
co_median<-median(clinicalData$Signature)
co_0_33<-quantile(clinicalData$Signature, 0.33)
co_0_66<-quantile(clinicalData$Signature, 0.66)
if (tissue=="Skin") {
co_density_intersection<--0.223156179#0.235202939
}else{
co_density_intersection<-0.3481057115#-0.223156179#0.235202939
}
suppressPackageStartupMessages(library(plotly))
p<-ggplot(data = clinicalData, aes(x = Signature, fill=EventMet)) +
geom_density(alpha=0.3)+
geom_vline(xintercept=c(co_mean, co_median, co_0_33, co_0_66, co_density_intersection), linetype=c('twodash', 'longdash', 'dotted', 'dotdash', 'dashed'))+
ggtitle(tissue)
ggplotly(p)
clinicalData$SignatureGroupMean <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_mean), "High", "Low"))
clinicalData$SignatureGroupMedian <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_median), "High", "Low"))
clinicalData$SignatureGroup0_33 <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_0_33), "High", "Low"))
clinicalData$SignatureGroup0_66 <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_0_66), "High", "Low"))
clinicalData$SignatureGroupDensityIntersection <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_density_intersection), "High", "Low"))
print(chisq.test(clinicalData$SignatureGroupMean, clinicalData$EventMet))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$SignatureGroupMean and clinicalData$EventMet
X-squared = 9.0843, df = 1, p-value = 0.002578
print(chisq.test(clinicalData$SignatureGroupMedian, clinicalData$EventMet))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$SignatureGroupMedian and clinicalData$EventMet
X-squared = 8.3039, df = 1, p-value = 0.003956
print(chisq.test(clinicalData$SignatureGroup0_33, clinicalData$EventMet))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$SignatureGroup0_33 and clinicalData$EventMet
X-squared = 13.838, df = 1, p-value = 0.0001992
print(chisq.test(clinicalData$SignatureGroup0_66, clinicalData$EventMet))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$SignatureGroup0_66 and clinicalData$EventMet
X-squared = 8.8431, df = 1, p-value = 0.002942
print(chisq.test(clinicalData$SignatureGroupDensityIntersection, clinicalData$EventMet))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$SignatureGroupDensityIntersection and clinicalData$EventMet
X-squared = 9.852, df = 1, p-value = 0.001696
#Check association of NRAS with other clinical covariates
print(chisq.test(clinicalData$NRAS, clinicalData$EventMet))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$NRAS and clinicalData$EventMet
X-squared = 2.5747, df = 1, p-value = 0.1086
print(chisq.test(clinicalData$NRAS, clinicalData$Bres))
Pearson's Chi-squared test
data: clinicalData$NRAS and clinicalData$Bres
X-squared = 3.0461, df = 2, p-value = 0.2181
print(chisq.test(clinicalData$NRAS, clinicalData$Ulc))
Pearson's Chi-squared test with Yates' continuity correction
data: clinicalData$NRAS and clinicalData$Ulc
X-squared = 0.16538, df = 1, p-value = 0.6843
#Check association of GRAMD1B with ulceration
my_comparisons <- list( c("Present", "Absent"))
ggplot(clinicalData[!is.na(clinicalData$Ulc),], aes(x=Ulc, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Ulc (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+
#Check association of GRAMD1B with Bres
my_comparisons <- list( c("<= 2.0 mm", ">2-4mm"), c("<= 2.0 mm", ">4.0mm"), c(">2-4mm", ">4.0mm"))
ggplot(clinicalData[!is.na(clinicalData$Bres),], aes(x=Bres, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Bres (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+

#Check association of GRAMD1B with Stage
my_comparisons <- list( c("IIB", "IIC"), c("IIB", "IIIA"), c("IIB", "IIIB"), c("IIB", "IIIC"),
c("IIC", "IIIA"), c("IIC", "IIIB"), c("IIC", "IIIC"),
c("IIIA", "IIIB"), c("IIIA", "IIIC"),
c("IIIB", "IIIC"))
ggplot(clinicalData[!is.na(clinicalData$Stage),], aes(x=Stage, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Stage (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+

ggplot(clinicalData[!is.na(clinicalData$Stage_binary),], aes(x=Stage_binary, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Stage (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")
#Check association of GRAMD1B with Site
my_comparisons <- list( c("Head and neck", "Lower lims"), c("Head and neck", "Trunk"), c("Head and neck", "Upper limbs"),
c("Lower lims", "Trunk"), c("Lower lims", "Upper limbs"),
c("Trunk", "Upper limbs"))
ggplot(clinicalData[!is.na(clinicalData$Site),], aes(x=Site, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Site (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+

#Check association of GRAMD1B with BRAF mutation
my_comparisons <- list( c("V600E", "WT"))
ggplot(clinicalData[!is.na(clinicalData$BRAF),], aes(x=BRAF, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("BRAF (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
#Check association of GRAMD1B with NRAS mutation
my_comparisons <- list( c("Mutant", "WT"))
ggplot(clinicalData[!is.na(clinicalData$NRAS),], aes(x=NRAS, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("NRAS (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
#Check association of GRAMD1B with TIL counts
jt_ns_sm_scores = read.table("../../../JT_NS_SM_Combined.xls - JT_NS_SM.tsv", sep = "\t", header = TRUE, quote = "")
combinedScore <- data.frame("Scanned_file.me"= jt_ns_sm_scores$JT_Scanned_file.me,
"R.Seq_sampleID"= jt_ns_sm_scores$JT_R.Seq_sampleID,
"ClarkScore"= ifelse(jt_ns_sm_scores$JT_Clark_score==jt_ns_sm_scores$NS_Clark_score,
jt_ns_sm_scores$JT_Clark_score,
jt_ns_sm_scores$SM_Clark_score),
"MIAScore"= ifelse(jt_ns_sm_scores$JT_TIL_grade==jt_ns_sm_scores$NS_TIL.GRADE,
jt_ns_sm_scores$JT_TIL_grade,
jt_ns_sm_scores$SM_TIL_grade))
#Remove duplicated entries
combinedScore<-combinedScore[!duplicated(combinedScore$R.Seq_sampleID), ]
rownames(combinedScore)<-combinedScore$R.Seq_sampleID #rename rownames with sample ids
rownames(clinicalData)<-clinicalData$RNA.Seq.Sample
df<-data.frame("EventMet"=clinicalData$EventMet,
"ClarkScore"=combinedScore[rownames(clinicalData), "ClarkScore"],
"MIAScore"=combinedScore[rownames(clinicalData), "MIAScore"],
"Signature"=clinicalData$Signature)
rownames(df)<-rownames(clinicalData)
df$ClarkScore <- as.factor(df$ClarkScore)
levels(df$ClarkScore) <- c("absent", "nonbrisk", "brisk")
my_comparisons <- list( c("absent", "nonbrisk"), c("nonbrisk", "brisk"), c("absent", "brisk") )
#df$JT_Clark_score <- factor(df$JT_Clark_score, levels = c("absent", "nonbrisk", "brisk"))
ggplot(df[!is.na(df$ClarkScore), ], aes(x=ClarkScore, y=Signature#, color=DRBrain
))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Clark score (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(df$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+

# facet_wrap(~DRBrain)# Add global p-value
my_comparisons <- list( c("0", "1"), c("0", "2"), c("0", "3"), c("1", "2"), c("1", "3"), c("2", "3"))
df$MIAScore <- as.factor(df$MIAScore)
ggplot(df[!is.na(df$MIAScore), ], aes(x=MIAScore, y=Signature#, color=DRBrain
))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5)+
scale_fill_brewer(type="qual", palette = "Dark2", name = "MIA score")+
xlab("MIA score (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(df$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+

#facet_wrap(~DRBrain)
# Add global p-value
#my_comparisons <- list( c("Mutant", "WT"))
ggplot(clinicalData[!is.na(clinicalData$treatment),], aes(x=treatment, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("Treatment (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
#my_comparisons <- list( c("Mutant", "WT"))
ggplot(clinicalData[!is.na(clinicalData$ECOG),], aes(x=ECOG, y=Signature))+
geom_violin()+
geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
#scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
xlab("ECOG (AVAST-M Skin)")+
ylab("VST normalized GRAMD1B expression (stand.)")+
#geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
theme(text=element_text(size=7, family="sans"))+
ggpubr::stat_compare_means(method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value

#ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
#Perform differential expression analysis
combinedScore$ClarkScore <- as.factor(combinedScore$ClarkScore)
levels(combinedScore$ClarkScore) <- c("absent", "nonbrisk", "brisk")
clinicalData$ClarkScore<-combinedScore[rownames(clinicalData), "ClarkScore"]
#merge(clinicalData, combinedScore, by.x="RNA.Seq.Sample", by.y="R.Seq_sampleID")
#rownames(clinicalData)<-clinicalData$RNA.Seq.Sample
#clinicalData$ClarkScore=combinedScore$ClarkScore[rownames(clinicalData)]
#While accounting for Stage, NRAS mutation, TIL score and EventMet
#Subset to only those samples which have non-missing entires for these covariates
clinicalData_sub<-clinicalData[(!is.na(clinicalData$Stage))&(!is.na(clinicalData$NRAS))&(!is.na(clinicalData$ClarkScore))&(!is.na(clinicalData$EventMet)), ]
data.f_sub<-data.f[, rownames(clinicalData_sub)]
table(clinicalData_sub$SignatureGroupMean)
High Low
34 32
dds1 <- DESeqDataSetFromMatrix(countData = data.f_sub,
colData = clinicalData_sub,
design = ~ Stage+NRAS+ClarkScore+EventMet+SignatureGroupMean)
totalcounts1.gene = rowSums(counts(dds1))
dds1 <- dds1[totalcounts1.gene>=10,]
# dimensions post filtering
m = nrow(dds1)
n = ncol(dds1)
dds1 <- DESeq(dds1)
using pre-existing size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
11 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest
res1 <- results(dds1)
res1
log2 fold change (MLE): SignatureGroupMean Low vs High
Wald test p-value: SignatureGroupMean Low vs High
DataFrame with 37079 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
ENSG00000000003 861.278 0.11366862 0.1629208 0.69769243 0.48536957 0.8133089
ENSG00000000005 33.432 -0.69252919 0.4546602 -1.52317957 0.12771379 0.4931014
ENSG00000000419 5709.545 -0.00041328 0.1010882 -0.00408831 0.99673801 0.9993295
ENSG00000000457 1391.790 0.16335820 0.0957011 1.70696327 0.08782888 0.4188545
ENSG00000000460 1956.996 0.39953052 0.1421792 2.81004990 0.00495338 0.0840005
... ... ... ... ... ... ...
ENSG00000284738 6.408754 0.272156 0.431137 0.631254 0.5278747 0.835093
ENSG00000284740 6.870408 0.591347 0.350720 1.686092 0.0917782 0.425316
ENSG00000284744 22.946725 -0.591098 0.374356 -1.578974 0.1143421 0.470308
ENSG00000284747 6.877460 -0.574416 0.327618 -1.753310 0.0795488 0.399437
ENSG00000284748 0.235384 -0.320125 1.496303 -0.213944 0.8305906 NA
#Merge with gene names and check if the differentially expressed genes make sense
res<-data.frame(res1)
res<-res[order(res$padj), ]
res$Name<-res.annot[rownames(res), "Name"]
head(res)
#GRAMD1B is differentially expressed in GRAMD1B groups and is downregulated in the low expression group compared to the high-expression group so atleast this makes sense.
#Perform lfc shrinkage for GSEA
library("apeglm")
coefName = resultsNames(dds1)
resWithLfcShrinkage = data.frame(lfcShrink(dds1, coef = tail(coefName, n=1), type="apeglm"))
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
#Add suffix to colnames for easy identification later on.
colnames(resWithLfcShrinkage) = paste(colnames(resWithLfcShrinkage),"lfcShrinkApplied", sep = "_")
#Add ENSEMBL ID as a column in the data frame to ease merging later
resWithLfcShrinkage$id = rownames(resWithLfcShrinkage)
#Merge these with DE original DE results
res$id<-rownames(res)
deResultsUpdated = merge(x = res, y = data.frame(resWithLfcShrinkage), by = "id")
deResultsUpdated = deResultsUpdated[order(deResultsUpdated$padj),]
#Save the results
write.table(deResultsUpdated[deResultsUpdated$padj_lfcShrinkApplied<0.1, ], file = "../results/de_gsea/deWithLfcSkrinkageApplied_GRAMD1B_all.tsv", col.names = TRUE, row.names = FALSE, quote = FALSE, sep = '\t')
#Save the differentially expressed genes (padj<0.1)
write.table(deResultsUpdated[deResultsUpdated$padj_lfcShrinkApplied<0.1, ], file = "../results/de_gsea/deWithLfcSkrinkageApplied_GRAMD1B_padj_0_1.tsv", col.names = TRUE, row.names = FALSE, quote = FALSE, sep = '\t')
head(deResultsUpdated)
#Create a ranked list for GSEA
#Create ranked list for running Pre-ranked GSEA tool
rankedList = na.omit(data.frame(deResultsUpdated$Name, deResultsUpdated$log2FoldChange_lfcShrinkApplied))
rankedList = rankedList[with(rankedList, order(deResultsUpdated.log2FoldChange_lfcShrinkApplied)), ]
#Save the ranked list to a new file for GSEA.
write.table(rankedList, file = "../results/de_gsea/rankedList_GRAMD1B.rnk", col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
#Plot survival curves between the continuous signature and the two groups ## First add necesssary columns
suppressPackageStartupMessages(library("survival"))
# survival outcome 1: "d" for death and "ltrc" for left truncated and right censored
clinicalData$survival_d_ltrc = Surv(time = as.numeric(difftime(clinicalData$DOE, clinicalData$DDiag))/365.25,
time2 = as.numeric(difftime(clinicalData$DOC, clinicalData$DDiag))/365.25,
event = ifelse(clinicalData$Dead == FALSE, 0, 1))
# survival outcome 2: "rd" for relapse or death, "ltrc" for left truncated and right censored
clinicalData$survival_rd_ltrc = Surv(time = as.numeric(difftime(clinicalData$DOE, clinicalData$DDiag))/365.25,
time2 = as.numeric(difftime(apply(clinicalData[, c("DOC", "DDistMets")],1,min,na.rm=TRUE), clinicalData$DDiag))/365.25,
event = ifelse((clinicalData$Dead == FALSE & clinicalData$EventMet == "No"), 0, 1))
if(tissue=="Skin"){
jt_ns_sm_scores = read.table("../JT_NS_SM_Combined.xls - JT_NS_SM.tsv", sep = "\t", header = TRUE, quote = "")
combinedScore <- data.frame("Scanned_file.me"= jt_ns_sm_scores$JT_Scanned_file.me,
"R.Seq_sampleID"= jt_ns_sm_scores$JT_R.Seq_sampleID,
"ClarkScore"=ifelse(jt_ns_sm_scores$JT_Clark_score==jt_ns_sm_scores$NS_Clark_score,
jt_ns_sm_scores$JT_Clark_score,
jt_ns_sm_scores$SM_Clark_score),
"MIAScore"= ifelse(jt_ns_sm_scores$JT_TIL_grade==jt_ns_sm_scores$NS_TIL.GRADE,
jt_ns_sm_scores$JT_TIL_grade,
jt_ns_sm_scores$SM_TIL_grade))
#Remove duplicated entries
combinedScore<-combinedScore[!duplicated(combinedScore$R.Seq_sampleID), ]
rownames(combinedScore)<-combinedScore$R.Seq_sampleID #rename rownames with sample ids
combinedScore$ClarkScore <- as.factor(combinedScore$ClarkScore)
levels(combinedScore$ClarkScore) <- c("absent", "nonbrisk", "brisk")
clinicalData<-merge(clinicalData, combinedScore, by.x="RNA.Seq.Sample", by.y="R.Seq_sampleID")
rownames(clinicalData)<-clinicalData$RNA.Seq.Sample
}
then calculate the values
os<-data.frame()
pfs<-data.frame()
temp<-data.frame()
combinations<-c("Signature", "SignatureGroupMean", "SignatureGroupMedian", "SignatureGroup0_33", "SignatureGroup0_66", "SignatureGroupDensityIntersection")
for (combination in combinations) {
if (tissue=="Skin") {
os_formula<-paste0("survival_d_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment+ClarkScore")
pfs_formula<-paste0("survival_rd_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment+ClarkScore")
} else{
os_formula<-paste0("survival_d_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment")
pfs_formula<-paste0("survival_rd_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment")
}
fit = coef(summary(coxph(as.formula(os_formula), data=clinicalData)))
mid = fit[1,c("exp(coef)")]
low = exp(fit[1,c("coef")]-
qnorm(.975)*fit[1,c("se(coef)")])
high = exp(fit[1,c("coef")]+
qnorm(.975)*fit[1,c("se(coef)")])
pval = fit[1,c("Pr(>|z|)")]
temp["Signature",c("Signature", "HR","low","high","pval")] = c(rownames(fit)[1],mid,low,high,pval)
os<-rbind(os, temp)
fit = coef(summary(coxph(as.formula(pfs_formula), data=clinicalData)))
mid = fit[1,c("exp(coef)")]
low = exp(fit[1,c("coef")]-
qnorm(.975)*fit[1,c("se(coef)")])
high = exp(fit[1,c("coef")]+
qnorm(.975)*fit[1,c("se(coef)")])
pval = fit[1,c("Pr(>|z|)")]
temp["Signature",c("Signature", "HR","low","high","pval")] = c(rownames(fit)[1],mid,low,high,pval)
pfs<-rbind(pfs, temp)
}
write.table(os, paste("../results/survival/AVAST-M_",tissue,"_os.tsv", sep=""), sep="\t", col.names = T, row.names = F)
write.table(pfs, paste("../results/survival/AVAST-M_",tissue,"_pfs.tsv", sep=""), sep="\t", col.names = T, row.names = F)
---
title: "R Notebook"
output: html_notebook
---

```{r}
suppressPackageStartupMessages(library("ggplot2"))
theme_set(ggpubr::theme_pubr(base_size=10, legend='bottom'))
```

```{r}
suppressPackageStartupMessages(library("DESeq2"))
```

#Note that we do not have Clark Scores or MIA scores for AVAST-M lymph node samples and these need to be commented out to avoid errors. 
```{r}
#Load the data
tissue<-c("Skin")
load(paste("~/Desktop/Melanoma/deseq2Results/tc",tissue,"EventMetNo_VS_tc",tissue,"EventMetYes_CovariateCorrection.deseq2/de.Rdata", sep=""))
```

```{r}
#find ENSEMBL ID corresponding to GRAMD1B
geneName<-"GRAMD1B"
select<-as.character(res.annot$id[res.annot$Name==geneName])
#extract expression of this gene
expressionData <- data.frame(assay(vsd)[select, ])
#Replace ENSEMBL IDs with corresponding gene names
names(expressionData) <- geneName
stand.fun <- function(x){(x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE)}
stdSignature<-stand.fun(expressionData$GRAMD1B)
names(stdSignature)<-rownames(expressionData) #make sure they retain the name of the samples
```

```{r}
#Multiply by beta coefficient?
#betaCoeff <- read.table("~/Desktop/Melanoma/betaCoeff.tsv", sep = "\t", header = TRUE, quote = "")
#extract beta coefficient for the gene of interest
#betaCoeffGene <- betaCoeff[select, "EventMet_Yes_vs_No"]
#weightedSignature <- expressionData*betaCoeffGene
#standardization of the weighted signature
#stand.fun <- function(x){(x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE)}
#stdWeightedSignature <- stand.fun(weightedSignature$GRAMD1B)
#names(stdWeightedSignature)<-rownames(weightedSignature) #make sure they retain the name of the samples
```

As expected the absolute values after standardization of weighted signature (stand.fun(weightedSignature\$GRAMD1B)) is same as those of standardized expression values (stand.fun(expressionData\$GRAMD1B)). The only difference comes from sign of the beta coefficient which is negative for GRAMD1B

```{r}
#Load clinical data
clinicalData<-data.frame(colData(vsd))
clinicalData$Signature<- stdSignature[rownames(clinicalData)] #merge signature in clinical data frame
```

#Check association of GRAMD1B with relapse
```{r}
my_comparisons <- list( c("Yes", "No"))
ggplot(clinicalData[!is.na(clinicalData$EventMet),], aes(x=EventMet, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Distant metastases (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+

```
Find suitable cut-off. After talking to Roy, it seems that we could go for a weighted GRAMD1B value of 0?
Let's check the median value. May be I can take the median cut-off and perform chi-squared test to check if there is no difference between the mean of 2 groups?
```{r}
#Divide in high/low groups based on mean and median
co_mean<-mean(clinicalData$Signature)
co_median<-median(clinicalData$Signature)
co_0_33<-quantile(clinicalData$Signature, 0.33)
co_0_66<-quantile(clinicalData$Signature, 0.66)
if (tissue=="Skin") {
  co_density_intersection<--0.223156179#0.235202939
}else{
  co_density_intersection<-0.3481057115#-0.223156179#0.235202939
}
suppressPackageStartupMessages(library(plotly))
p<-ggplot(data = clinicalData, aes(x = Signature, fill=EventMet)) +
  geom_density(alpha=0.3)+
  geom_vline(xintercept=c(co_mean, co_median, co_0_33, co_0_66, co_density_intersection), linetype=c('twodash', 'longdash', 'dotted', 'dotdash', 'dashed'))+
  ggtitle(tissue)
ggplotly(p) 
```

```{r}
clinicalData$SignatureGroupMean <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_mean), "High", "Low"))
clinicalData$SignatureGroupMedian <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_median), "High", "Low"))
clinicalData$SignatureGroup0_33 <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_0_33), "High", "Low"))
clinicalData$SignatureGroup0_66 <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_0_66), "High", "Low"))
clinicalData$SignatureGroupDensityIntersection <- as.factor(ifelse(clinicalData$Signature >= as.numeric(co_density_intersection), "High", "Low"))

print(chisq.test(clinicalData$SignatureGroupMean, clinicalData$EventMet))
print(chisq.test(clinicalData$SignatureGroupMedian, clinicalData$EventMet))
print(chisq.test(clinicalData$SignatureGroup0_33, clinicalData$EventMet))
print(chisq.test(clinicalData$SignatureGroup0_66, clinicalData$EventMet))
print(chisq.test(clinicalData$SignatureGroupDensityIntersection, clinicalData$EventMet))
```
#Check association of NRAS with other clinical covariates
```{r}
print(chisq.test(clinicalData$NRAS, clinicalData$EventMet))
print(chisq.test(clinicalData$NRAS, clinicalData$Bres))
print(chisq.test(clinicalData$NRAS, clinicalData$Ulc))
```
#Check association of GRAMD1B with ulceration
```{r}
my_comparisons <- list( c("Present", "Absent"))
ggplot(clinicalData[!is.na(clinicalData$Ulc),], aes(x=Ulc, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Ulc (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+
```

#Check association of GRAMD1B with Bres
```{r}
my_comparisons <- list( c("<= 2.0 mm", ">2-4mm"), c("<= 2.0 mm", ">4.0mm"), c(">2-4mm", ">4.0mm"))
ggplot(clinicalData[!is.na(clinicalData$Bres),], aes(x=Bres, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Bres (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
  ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+
```

#Check association of GRAMD1B with Stage
```{r}
my_comparisons <- list( c("IIB", "IIC"), c("IIB", "IIIA"), c("IIB", "IIIB"), c("IIB", "IIIC"),
                        c("IIC", "IIIA"), c("IIC", "IIIB"), c("IIC", "IIIC"),
                        c("IIIA", "IIIB"), c("IIIA", "IIIC"),
                        c("IIIB", "IIIC"))
ggplot(clinicalData[!is.na(clinicalData$Stage),], aes(x=Stage, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Stage (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
  ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
```
```{r}
ggplot(clinicalData[!is.na(clinicalData$Stage_binary),], aes(x=Stage_binary, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Stage (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")
```

#Check association of GRAMD1B with Site
```{r}
my_comparisons <- list( c("Head and neck", "Lower lims"), c("Head and neck", "Trunk"), c("Head and neck", "Upper limbs"),
                        c("Lower lims", "Trunk"), c("Lower lims", "Upper limbs"),
                        c("Trunk", "Upper limbs"))
ggplot(clinicalData[!is.na(clinicalData$Site),], aes(x=Site, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Site (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
  ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+

```

#Check association of GRAMD1B with BRAF mutation
```{r}
my_comparisons <- list( c("V600E", "WT"))
ggplot(clinicalData[!is.na(clinicalData$BRAF),], aes(x=BRAF, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("BRAF (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
```

#Check association of GRAMD1B with NRAS mutation
```{r}
my_comparisons <- list( c("Mutant", "WT"))
ggplot(clinicalData[!is.na(clinicalData$NRAS),], aes(x=NRAS, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("NRAS (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
```

#Check association of GRAMD1B with TIL counts
```{r}
jt_ns_sm_scores = read.table("../../../JT_NS_SM_Combined.xls - JT_NS_SM.tsv", sep = "\t", header = TRUE, quote = "")
combinedScore <- data.frame("Scanned_file.me"= jt_ns_sm_scores$JT_Scanned_file.me, 
                                "R.Seq_sampleID"= jt_ns_sm_scores$JT_R.Seq_sampleID, 
                                "ClarkScore"= ifelse(jt_ns_sm_scores$JT_Clark_score==jt_ns_sm_scores$NS_Clark_score,
                            jt_ns_sm_scores$JT_Clark_score, 
                            jt_ns_sm_scores$SM_Clark_score),
                           "MIAScore"= ifelse(jt_ns_sm_scores$JT_TIL_grade==jt_ns_sm_scores$NS_TIL.GRADE,
                            jt_ns_sm_scores$JT_TIL_grade, 
                            jt_ns_sm_scores$SM_TIL_grade))
#Remove duplicated entries
combinedScore<-combinedScore[!duplicated(combinedScore$R.Seq_sampleID), ]
rownames(combinedScore)<-combinedScore$R.Seq_sampleID #rename rownames with sample ids
```

```{r}
rownames(clinicalData)<-clinicalData$RNA.Seq.Sample
df<-data.frame("EventMet"=clinicalData$EventMet,
               "ClarkScore"=combinedScore[rownames(clinicalData), "ClarkScore"],
               "MIAScore"=combinedScore[rownames(clinicalData), "MIAScore"],
               "Signature"=clinicalData$Signature)
rownames(df)<-rownames(clinicalData)

df$ClarkScore <- as.factor(df$ClarkScore)
levels(df$ClarkScore) <- c("absent", "nonbrisk", "brisk")
```

```{r}
my_comparisons <- list( c("absent", "nonbrisk"), c("nonbrisk", "brisk"), c("absent", "brisk") )
#df$JT_Clark_score <- factor(df$JT_Clark_score, levels = c("absent", "nonbrisk", "brisk"))
ggplot(df[!is.na(df$ClarkScore), ], aes(x=ClarkScore, y=Signature#, color=DRBrain
                                        ))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Clark score (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(df$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
  ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+
 # facet_wrap(~DRBrain)# Add global p-value
```

```{r}
my_comparisons <- list( c("0", "1"), c("0", "2"), c("0", "3"), c("1", "2"), c("1", "3"), c("2", "3"))
df$MIAScore <- as.factor(df$MIAScore)
ggplot(df[!is.na(df$MIAScore), ], aes(x=MIAScore, y=Signature#, color=DRBrain
                                      ))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5)+
  scale_fill_brewer(type="qual", palette = "Dark2", name = "MIA score")+
  xlab("MIA score (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(df$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(comparisons = my_comparisons, method = "t.test", size = 2.5, family="sans")+ # Add pairwise comparisons p-value
  ggpubr::stat_compare_means(label.y = 6, method = "anova", size = 2.5, family="sans")#+
  #facet_wrap(~DRBrain)
     # Add global p-value

```
```{r}
#my_comparisons <- list( c("Mutant", "WT"))
ggplot(clinicalData[!is.na(clinicalData$treatment),], aes(x=treatment, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("Treatment (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
```
```{r}
#my_comparisons <- list( c("Mutant", "WT"))
ggplot(clinicalData[!is.na(clinicalData$ECOG),], aes(x=ECOG, y=Signature))+
  geom_violin()+
  geom_jitter(height = 0, width = 0.1, alpha = 0.5, aes())+
  #scale_fill_brewer(type="qual", palette = "Dark2", name = "Cam_121 risk group")+
  xlab("ECOG (AVAST-M Skin)")+
  ylab("VST normalized GRAMD1B expression (stand.)")+
  #geom_hline(yintercept = quantile(clinicalData$Signature, 0.75), color = "grey", linetype = "longdash")+
  theme(text=element_text(size=7,  family="sans"))+
  ggpubr::stat_compare_means(method = "t.test", size = 2.5, family="sans")#+ # Add pairwise comparisons p-value
  #ggpubr::stat_compare_means(label.y = 9, method = "anova", size = 2.5, family="sans")#+
```

#Perform differential expression analysis
```{r}
combinedScore$ClarkScore <- as.factor(combinedScore$ClarkScore)
levels(combinedScore$ClarkScore) <- c("absent", "nonbrisk", "brisk")
clinicalData$ClarkScore<-combinedScore[rownames(clinicalData), "ClarkScore"]
#merge(clinicalData, combinedScore, by.x="RNA.Seq.Sample", by.y="R.Seq_sampleID")
#rownames(clinicalData)<-clinicalData$RNA.Seq.Sample
#clinicalData$ClarkScore=combinedScore$ClarkScore[rownames(clinicalData)]
```

```{r}
#While accounting for Stage, NRAS mutation, TIL score and EventMet
#Subset to only those samples which have non-missing entires for these covariates
clinicalData_sub<-clinicalData[(!is.na(clinicalData$Stage))&(!is.na(clinicalData$NRAS))&(!is.na(clinicalData$ClarkScore))&(!is.na(clinicalData$EventMet)), ]
data.f_sub<-data.f[, rownames(clinicalData_sub)]
```

```{r}
table(clinicalData_sub$SignatureGroupMean)
```

```{r}
dds1 <- DESeqDataSetFromMatrix(countData = data.f_sub,
                               colData   = clinicalData_sub,
                               design    = ~ Stage+NRAS+ClarkScore+EventMet+SignatureGroupMean)
```

```{r}
totalcounts1.gene = rowSums(counts(dds1))
dds1 <- dds1[totalcounts1.gene>=10,]
```

```{r}
# dimensions post filtering
m = nrow(dds1)
n = ncol(dds1)
```

```{r}
dds1 <- DESeq(dds1)
res1 <- results(dds1)
res1
```
```{r}
#Merge with gene names and check if the differentially expressed genes make sense
res<-data.frame(res1)
res<-res[order(res$padj), ]
res$Name<-res.annot[rownames(res), "Name"]
head(res)
#GRAMD1B is differentially expressed in GRAMD1B groups and is downregulated in the low expression group compared to the high-expression group so atleast this makes sense.
```
#Perform lfc shrinkage for GSEA
```{r}
library("apeglm")
coefName = resultsNames(dds1)
resWithLfcShrinkage = data.frame(lfcShrink(dds1, coef = tail(coefName, n=1), type="apeglm"))
#Add suffix to colnames for easy identification later on.
colnames(resWithLfcShrinkage) = paste(colnames(resWithLfcShrinkage),"lfcShrinkApplied", sep = "_")
#Add ENSEMBL ID as a column in the data frame to ease merging later
resWithLfcShrinkage$id = rownames(resWithLfcShrinkage)
```

```{r}
#Merge these with DE original DE results
res$id<-rownames(res)
deResultsUpdated = merge(x = res, y = data.frame(resWithLfcShrinkage), by = "id")
deResultsUpdated = deResultsUpdated[order(deResultsUpdated$padj),]
#Save the results
write.table(deResultsUpdated[deResultsUpdated$padj_lfcShrinkApplied<0.1, ], file = "../results/de_gsea/deWithLfcSkrinkageApplied_GRAMD1B_all.tsv", col.names = TRUE, row.names = FALSE, quote = FALSE, sep = '\t')
#Save the differentially expressed genes (padj<0.1)
write.table(deResultsUpdated[deResultsUpdated$padj_lfcShrinkApplied<0.1, ], file = "../results/de_gsea/deWithLfcSkrinkageApplied_GRAMD1B_padj_0_1.tsv", col.names = TRUE, row.names = FALSE, quote = FALSE, sep = '\t')
```

```{r}
head(deResultsUpdated)
```

#Create a ranked list for GSEA
```{r}
#Create ranked list for running Pre-ranked GSEA tool
rankedList = na.omit(data.frame(deResultsUpdated$Name, deResultsUpdated$log2FoldChange_lfcShrinkApplied))
rankedList = rankedList[with(rankedList, order(deResultsUpdated.log2FoldChange_lfcShrinkApplied)), ]
#Save the ranked list to a new file for GSEA.
write.table(rankedList, file = "../results/de_gsea/rankedList_GRAMD1B.rnk", col.names = FALSE, row.names = FALSE, quote = FALSE, sep = '\t')
```

#Plot survival curves between the continuous signature and the two groups
## First add necesssary columns
```{r}
suppressPackageStartupMessages(library("survival"))
# survival outcome 1: "d" for death and "ltrc" for left truncated and right censored
clinicalData$survival_d_ltrc = Surv(time  = as.numeric(difftime(clinicalData$DOE, clinicalData$DDiag))/365.25,
                                    time2 = as.numeric(difftime(clinicalData$DOC, clinicalData$DDiag))/365.25,
                                    event = ifelse(clinicalData$Dead == FALSE, 0, 1))

# survival outcome 2: "rd" for relapse or death, "ltrc" for left truncated and right censored
clinicalData$survival_rd_ltrc = Surv(time  = as.numeric(difftime(clinicalData$DOE, clinicalData$DDiag))/365.25,
                                     time2 = as.numeric(difftime(apply(clinicalData[, c("DOC", "DDistMets")],1,min,na.rm=TRUE), clinicalData$DDiag))/365.25,
                                     event = ifelse((clinicalData$Dead == FALSE & clinicalData$EventMet == "No"), 0, 1))

if(tissue=="Skin"){
  jt_ns_sm_scores = read.table("../JT_NS_SM_Combined.xls - JT_NS_SM.tsv", sep = "\t", header = TRUE, quote = "")
  combinedScore <- data.frame("Scanned_file.me"= jt_ns_sm_scores$JT_Scanned_file.me, 
                              "R.Seq_sampleID"= jt_ns_sm_scores$JT_R.Seq_sampleID, 
                              "ClarkScore"=ifelse(jt_ns_sm_scores$JT_Clark_score==jt_ns_sm_scores$NS_Clark_score,
                                                  jt_ns_sm_scores$JT_Clark_score, 
                                                  jt_ns_sm_scores$SM_Clark_score),
                           "MIAScore"= ifelse(jt_ns_sm_scores$JT_TIL_grade==jt_ns_sm_scores$NS_TIL.GRADE,
                                              jt_ns_sm_scores$JT_TIL_grade, 
                                              jt_ns_sm_scores$SM_TIL_grade))
  #Remove duplicated entries
  combinedScore<-combinedScore[!duplicated(combinedScore$R.Seq_sampleID), ]
  rownames(combinedScore)<-combinedScore$R.Seq_sampleID #rename rownames with sample ids
  combinedScore$ClarkScore <- as.factor(combinedScore$ClarkScore)
  levels(combinedScore$ClarkScore) <- c("absent", "nonbrisk", "brisk")
  clinicalData<-merge(clinicalData, combinedScore, by.x="RNA.Seq.Sample", by.y="R.Seq_sampleID")
  rownames(clinicalData)<-clinicalData$RNA.Seq.Sample
}
```

## then calculate the values
```{r}
os<-data.frame()
pfs<-data.frame()
temp<-data.frame()

combinations<-c("Signature", "SignatureGroupMean", "SignatureGroupMedian", "SignatureGroup0_33", "SignatureGroup0_66", "SignatureGroupDensityIntersection")

for (combination in combinations) {
  if (tissue=="Skin") {
    os_formula<-paste0("survival_d_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment+ClarkScore")
    pfs_formula<-paste0("survival_rd_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment+ClarkScore")
  } else{
    os_formula<-paste0("survival_d_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment")
    pfs_formula<-paste0("survival_rd_ltrc~",combination,"+Sex+Age+as.numeric(Nclass)+as.character(Stage)+ECOG+treatment")
  }
  
  fit = coef(summary(coxph(as.formula(os_formula), data=clinicalData)))
  mid  = fit[1,c("exp(coef)")]
  low  = exp(fit[1,c("coef")]-
              qnorm(.975)*fit[1,c("se(coef)")])                   
  high = exp(fit[1,c("coef")]+
              qnorm(.975)*fit[1,c("se(coef)")])  
  pval = fit[1,c("Pr(>|z|)")]
  temp["Signature",c("Signature", "HR","low","high","pval")] = c(rownames(fit)[1],mid,low,high,pval)
  os<-rbind(os, temp)
  
  fit = coef(summary(coxph(as.formula(pfs_formula), data=clinicalData)))
  mid  = fit[1,c("exp(coef)")]
  low  = exp(fit[1,c("coef")]-
              qnorm(.975)*fit[1,c("se(coef)")])                   
  high = exp(fit[1,c("coef")]+
              qnorm(.975)*fit[1,c("se(coef)")])  
  pval = fit[1,c("Pr(>|z|)")]
  temp["Signature",c("Signature", "HR","low","high","pval")] = c(rownames(fit)[1],mid,low,high,pval)
  pfs<-rbind(pfs, temp)
}

write.table(os, paste("../results/survival/AVAST-M_",tissue,"_os.tsv", sep=""), sep="\t", col.names = T, row.names = F)
write.table(pfs, paste("../results/survival/AVAST-M_",tissue,"_pfs.tsv", sep=""), sep="\t", col.names = T, row.names = F)
```
